library(here)
library(ggplot2)
library(plotly)
library(tidyverse)
library(treemapify)
curr.dir = here()
knitr::opts_knit$set(root.dir = curr.dir)
knitr::opts_knit$get()$root.dir
## [1] "/Users/luischavesrodriguez/OneDrive - Imperial College London/ExtratimeWork/COVID19"
up.to.date = read.csv("NovelCOVID/covid_19_data.csv")
up.to.date$Active = up.to.date$Confirmed - up.to.date$Deaths-up.to.date$Recovered
countryToContinent = read.csv("Countries-Continents.csv")
countryToContinent = rbind(countryToContinent, cbind(Continent = c(rep("Asia",5),rep("Europe",2)),
Country = c("Hong Kong", "Mainland China",
"Macau", "Taiwan","South Korea",
"UK", "Czech Republic")))
countryToContinent$Continent = as.character(countryToContinent$Continent)
countryToContinent$Country = as.character(countryToContinent$Country)
up.to.date = merge(up.to.date, countryToContinent, by.x = "Country.Region", by.y = "Country", all.x = T, sort = F)
You can also embed plots, for example:
sum.table = up.to.date %>%
select(-c(Last.Update, SNo, ObservationDate)) %>%
group_by(Continent, Country.Region, Province.State) %>%
summarise(Confirmed = max(Confirmed), Deaths = max(Deaths), #using max because data is cumulative
Recovered = max(Recovered), Active = max(Active)) %>% arrange(-Active)
knitr::kable(sum.table %>% head(20))
| Continent | Country.Region | Province.State | Confirmed | Deaths | Recovered | Active |
|---|---|---|---|---|---|---|
| Europe | Italy | 80589 | 8215 | 10361 | 62013 | |
| Asia | Mainland China | Hubei | 67801 | 3169 | 61201 | 50633 |
| Europe | Spain | 57786 | 4365 | 7015 | 46406 | |
| Europe | Germany | 43938 | 267 | 5673 | 37998 | |
| North America | US | New York | 37877 | 385 | 0 | 37492 |
| Europe | France | 29155 | 1696 | 4948 | 22511 | |
| Europe | France | French Polynesia | 19874 | 860 | 2200 | 16814 |
| Asia | Iran | 29406 | 2234 | 10457 | 16715 | |
| Europe | France | France | 14282 | 562 | 12 | 13708 |
| Europe | Switzerland | 11811 | 191 | 131 | 11489 | |
| Europe | UK | 11658 | 578 | 135 | 10945 | |
| Asia | South Korea | 9241 | 131 | 4144 | 7577 | |
| Europe | Netherlands | 7431 | 434 | 3 | 6994 | |
| North America | US | New Jersey | 6876 | 81 | 1 | 6795 |
| Europe | Austria | 6909 | 49 | 112 | 6748 | |
| Europe | Belgium | 6235 | 220 | 675 | 5340 | |
| Europe | UK | United Kingdom | 5018 | 233 | 65 | 4720 |
| North America | US | California | 3899 | 81 | 6 | 3818 |
| Asia | Turkey | 3629 | 75 | 26 | 3528 | |
| Europe | Netherlands | Netherlands | 3631 | 136 | 2 | 3493 |
plot1 = ggplotly(sum.table %>%
mutate(Province.State = ifelse(Province.State == "",
as.character(Country.Region),
as.character(Province.State))) %>% select(-Active) %>%
head(20) %>%
pivot_longer(-c(Country.Region, Province.State, Continent), names_to = "Metric", values_to = "Amount") %>%
ggplot(aes(x = reorder(Province.State, Amount), y = Amount, fill = Continent))+
geom_col(color = 'black')+
facet_wrap(~Metric)+coord_flip()+theme_minimal())
plot1
plot2 = ggplotly(sum.table %>%
mutate(Province.State = ifelse(Province.State == "",
as.character(Country.Region),
as.character(Province.State))) %>% select(-Active) %>%
head(20) %>%
pivot_longer(-c(Country.Region, Province.State, Continent), names_to = "Metric", values_to = "Amount") %>%
ggplot(aes(x = reorder(Province.State, Amount), y = Amount))+
geom_col(aes(fill = Metric), position = "identity")+coord_flip()+theme_minimal())
plot2
sum.table %>%
mutate(Province.State = ifelse(Province.State == "",
as.character(Country.Region),
as.character(Province.State))) %>%
select(-Active) %>%
head(10)%>%
ggplot(aes(area = Deaths, label = paste0(Province.State,":\n",Deaths),
fill = Continent, subgroup = Continent))+geom_treemap()+
geom_treemap_text(colour = "white", place = "topleft")+coord_fixed()+
ggtitle("Number of deaths by country\nin top 10 countries with more active cases")
sum.table %>%
mutate(Province.State = ifelse(Province.State == "",
as.character(Country.Region),
as.character(Province.State))) %>%
head(10)%>%
ggplot(aes(area = Confirmed, label = paste0(Province.State,":\n",Confirmed),
fill = Continent,subgroup = Continent))+geom_treemap()+
geom_treemap_text(colour = "white", place = "topleft")+coord_fixed()+
ggtitle("Number of confirmed cases by country\nin top 10 countries with more active cases")
sum.table = sum.table %>% mutate(Province.State = ifelse(Province.State == "",
as.character(Country.Region),
as.character(Province.State)))
df.plotly = sum.table
df.plotly$Continent = ifelse(is.na(df.plotly$Continent), "Missing", df.plotly$Continent)
conts = df.plotly %>% group_by(Continent) %>% summarise(Deaths = sum(Deaths),
Confirmed = sum(Confirmed),
Recovered = sum(Recovered),
Active = sum(Active)) %>%
mutate(parent = "") %>% rename(labels = Continent)
countrs = df.plotly %>% group_by(Country.Region) %>%
summarise(Deaths = sum(Deaths),Confirmed = sum(Confirmed),
Recovered = sum(Recovered),Active = sum(Active),
parent = unique(Continent)) %>% rename(labels = Country.Region)
regs = df.plotly %>% ungroup() %>% select(-Continent) %>% rename(labels = Province.State, parent = Country.Region)
regs = regs[,c(2,3,4,5,6,1)]
df.plotly = rbind(conts, countrs)
toPlot = df.plotly %>% filter(Active > 100)
plot4 <- plot_ly(
type="treemap",
values=toPlot$Deaths,
labels = toPlot$labels,
parents= toPlot$parent
# textinfo="label+value+percent parent+percent entry+percent root",
#domain=list(column=0)
)
plot4
plot4 <- plot_ly(
type="treemap",
values=toPlot$Confirmed,
labels = toPlot$labels,
parents= toPlot$parent
# textinfo="label+value+percent parent+percent entry+percent root",
#domain=list(column=0)
)
plot4
plot4 <- plot_ly(
type="treemap",
values=toPlot$Recovered,
labels = toPlot$labels,
parents= toPlot$parent
# textinfo="label+value+percent parent+percent entry+percent root",
#domain=list(column=0)
)
plot4
plot4 <- plot_ly(
type="treemap",
values=toPlot$Deaths/toPlot$Confirmed,
labels = toPlot$labels,
parents= toPlot$parent
# textinfo="label+value+percent parent+percent entry+percent root",
#domain=list(column=0)
)
plot4
plot4 <- plot_ly(
type="treemap",
values=toPlot$Recovered/toPlot$Confirmed,
labels = toPlot$labels,
parents= toPlot$parent
# textinfo="label+value+percent parent+percent entry+percent root",
#domain=list(column=0)
)
plot4